library(topicmodels)
library(tm) #for pre-proccessing the data and create a document-term-matrics
## Warning: package 'tm' was built under R version 4.4.1
## Loading required package: NLP
## Warning: package 'NLP' was built under R version 4.4.1
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::annotate() masks NLP::annotate()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidytext)
library(tidyr)
library(slam)
## Warning: package 'slam' was built under R version 4.4.1
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.4.1
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(textstem)
## Loading required package: koRpus.lang.en
## Loading required package: koRpus
## Loading required package: sylly
## For information on available language packages for 'koRpus', run
##
## available.koRpus.lang()
##
## and see ?install.koRpus.lang()
##
##
## Attaching package: 'koRpus'
##
## The following object is masked from 'package:readr':
##
## tokenize
##
## The following object is masked from 'package:tm':
##
## readTagged
library(readtext)
library(viridis) # color blind pallete.
## Loading required package: viridisLite
At this time, I’m curious about the warning: ‘The following object is masked by xxx package’, I searched online and found it’s because multiple packages have the same function names. When a function is called, R uses the first one it finds, and the second one is masked. For example, in this case, select() in dplyr appears later in the search, so it is masked when we simply call select(). So how to solve this? I learned from this website (https://stackoverflow.com/questions/39137110/what-does-the-following-object-is-masked-from-packagexxx-mean) that we can explicitly specify the package name with :: before a function, for example dplyr::select.
# load the test positive dataset
data_dir <- "/Users/yuxinkarlie/Library/Mobile Documents/com~apple~CloudDocs/TC columbia/Last semester/ORLA 6541 /Exercise_5/aclImdb/test/pos"
#get rid of non-standard characters
data_big <- readtext(paste0(data_dir, "/*.txt"), encoding = "UTF-8")
dim(data_big)
## [1] 12500 2
head(data_big)
## readtext object consisting of 6 documents and 0 docvars.
## # A data frame: 6 × 2
## doc_id text
## * <chr> <chr>
## 1 0_10.txt "\"I went and\"..."
## 2 1_10.txt "\"My boyfrie\"..."
## 3 10_7.txt "\"If you had\"..."
## 4 100_10.txt "\"The finest\"..."
## 5 1000_9.txt "\"I was fort\"..."
## 6 10000_7.txt "\"Actor turn\"..."
Interestingly, I found we can read data by directly assigning file path to a dataname, this time we read a folder that contains several txt files.
# Sample 100 random rows to save time fro CPU
data <- sample_n(data_big, 100)
head(data)
## readtext object consisting of 6 documents and 0 docvars.
## # A data frame: 6 × 2
## doc_id text
## * <chr> <chr>
## 1 7888_10.txt "\"After rece\"..."
## 2 5695_10.txt "\"BLACK WATE\"..."
## 3 12216_10.txt "\"Surface is\"..."
## 4 6723_7.txt "\"A young Ko\"..."
## 5 2180_8.txt "\"FIVE STAR \"..."
## 6 3095_10.txt "\"You will b\"..."
# Check out what I have
view(data)
corpus <- Corpus(VectorSource(data$text)) #in pacakge tm
corpus
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 100
text_DTM <- DocumentTermMatrix(corpus,control = list(stemming=TRUE, stopwords = TRUE, minWordLength = 3,
removeNumbers = TRUE, removePunctuation = TRUE))
dim(text_DTM)
## [1] 100 3512
text_DTM
## <<DocumentTermMatrix (documents: 100, terms: 3512)>>
## Non-/sparse entries: 9062/342138
## Sparsity : 97%
## Maximal term length: 38
## Weighting : term frequency (tf)
Show the tfidf (term frequency-inverse document frequency)
term_tfidf <- tapply(text_DTM$v / row_sums(text_DTM)[text_DTM$i], text_DTM$j, mean) * log2(nDocs(text_DTM)/col_sums(text_DTM > 0))
summary(term_tfidf)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01336 0.03119 0.04400 0.05867 0.07249 0.47456
plot(density(term_tfidf))
Choose median as alpha to cut down the total number of terms, to get rid of unique terms and terms that appears too many times(both sides of the distribution)
alpha <- 0.04
text_DTM_trimmed <- text_DTM[row_sums(text_DTM) > 0, term_tfidf >= alpha]
dim(text_DTM_trimmed)
## [1] 100 2054
Randomly shuffled the dataset and split it into 10 approximately equal-sized subsets (folds). Each subset is referred to as a fold. It took about 5-10 minutes to run the following.
# Cross validation
control_CTM_VEM <- list(
estimate.beta = TRUE, verbose = 0, prefix = tempfile(), save = 0, keep = 0,
seed = as.integer(Sys.time()), nstart=1L, best = TRUE,
var = list(iter.max=100, tol=10^-6),
em = list(iter.max=500, tol=10^-4),
cg = list(iter.max=100, tol=10^5)
)
# use 10-fold CV to determine k!
# randomly divide the data into 10 folds.
set.seed(100)
topics <- c(2, 3, 4, 5, 6, 7, 8, 9, 10, 15) ##set k equals to 2 3 4 5 6 7 8 9 10 15.
seed <- 2
D <- length(data$text)
folding <- sample(rep(seq_len(10), ceiling(D))[seq_len(D)])
table(folding)
## folding
## 1 2 3 4 5 6 7 8 9 10
## 10 10 10 10 10 10 10 10 10 10
## write a loop to automatically output the perplexity
perp_by_col <- vector()
for (k in topics) {
perp_by_row <- vector()
for (chain in seq_len(10)) {
training <- CTM(text_DTM_trimmed[folding != chain,], k = k,
control = control_CTM_VEM)
testing <- CTM(text_DTM_trimmed[folding == chain,], model = training,
control = control_CTM_VEM)
perp_by_row <- rbind(perp_by_row, perplexity(testing))
}
perp_by_col <- cbind(perp_by_col, perp_by_row)
}
Plot the perplexity for each of the 10 folds, and then plot the average perplexity.
# Plot perplexity
transpose <- t(perp_by_col)
matplot(transpose, type = "l", col = rainbow(9), lty = 2, lwd = 2, ylab = "Perplexity", xlab = "K", main = "CTM-10-fold cross validation", xaxt="n")
axis(1, at=1:10, labels = c("k=2", "k=3", "k=4", "k=5", "k=6", "k=7", "k=8", "k=9", "k=10", "k=15"), cex=0.5)
perp_by_col_mean <- colMeans(perp_by_col)
lines(perp_by_col_mean, col = "black", lwd = 4, lty = 1)
led <- c("fold=2", "fold=3", "fold=4", "fold=5", "fold=6", "fold=7", "fold=8", "fold=9", "fold=10", "Average")
legend("topright", led, lwd = 2, lty = 2, col = c(rainbow(9), 'black'), cex = 0.65)
abline(v = 4, col = "gray60", lty = 2)
# Average Perplexity
{plot(perp_by_col_mean, pch = 20, ylab = 'Perplexity', xlab = "K", main = "Average CTM-10-fold cross validation",
xaxt = "n")
axis(1, at = 1:10, labels = c("k=2","k=3","k=4","k=5","k=6","k=7","k=8","k=9","k=10","k=15"), cex = 0.5)
lines(perp_by_col_mean, lwd = 1, lty = 2, col = "red")}
I think I will choose k=6 here, since this is when the curve startes to get faltten. But I don’t know why the original markdown chose k=9 (We have similar curve shape).
# Classical MDS
# N rows (objects) x p columns (variables)
# each row identified by a unique row name
d <- dist(topics3) # euclidean distances between the rows
fit <- isoMDS(d, k=2) # k is the number of dim
## initial value 42.888319
## iter 5 value 27.033820
## iter 10 value 21.867575
## iter 10 value 21.858295
## iter 10 value 21.858295
## final value 21.858295
## converged
# If there are identical rows. Run the below only if there are identical rows
#library(vegan)
#fit <- vegan::metaMDS(comm = dist(topics))
fit # view results
## $points
## [,1] [,2]
## [1,] 1.019837183 0.444938214
## [2,] -0.639820204 -0.929885801
## [3,] -0.639709901 -0.926193594
## [4,] -0.262271037 1.081522839
## [5,] 1.016622341 0.438966195
## [6,] 0.677383006 -0.908037466
## [7,] -0.032618839 0.015532426
## [8,] -1.137952268 0.194366625
## [9,] 0.680671479 -0.909409151
## [10,] -0.639503123 -0.929851292
## [11,] 0.683304591 -0.909463975
## [12,] -0.258120692 1.080819277
## [13,] 0.678150375 -0.908599685
## [14,] -1.137203130 0.192073121
## [15,] -0.253510439 1.077124929
## [16,] -0.640457210 -0.929419868
## [17,] 1.014635679 0.444618015
## [18,] -1.139245783 0.192664309
## [19,] -0.639593823 -0.927068974
## [20,] -0.025580020 0.006779017
## [21,] -1.139128135 0.193631928
## [22,] 1.018521592 0.440783974
## [23,] 1.014882573 0.442695231
## [24,] 0.678053066 -0.909667816
## [25,] -1.138783437 0.194629846
## [26,] 1.019761681 0.445445967
## [27,] -0.251415543 1.074242541
## [28,] -0.252738845 1.078870986
## [29,] -0.639981485 -0.928085809
## [30,] 1.016583317 0.439006118
## [31,] 0.019115446 0.063056204
## [32,] -0.018422252 0.018055421
## [33,] -1.138627407 0.193477883
## [34,] 0.678226988 -0.911417802
## [35,] 0.677983370 -0.908945174
## [36,] 0.680034350 -0.909458457
## [37,] 1.019455547 0.441380742
## [38,] -1.141824929 0.192480057
## [39,] -1.140428404 0.195424847
## [40,] 0.677913161 -0.911336729
## [41,] 1.019093584 0.442785802
## [42,] -0.255557983 1.080171945
## [43,] 1.015711015 0.441638417
## [44,] 0.679356541 -0.910895682
## [45,] 1.016648427 0.443544810
## [46,] -0.255018498 1.078246749
## [47,] -1.142020561 0.195353262
## [48,] -0.639686157 -0.924927268
## [49,] -0.632417665 -0.911990063
## [50,] 1.018855331 0.440804815
## [51,] 0.676498171 -0.909486960
## [52,] 0.685113696 -0.910790665
## [53,] -1.141847595 0.195061576
## [54,] 1.015871854 0.440743774
## [55,] 0.677577766 -0.911426274
## [56,] -0.003862475 -0.025259907
## [57,] 0.682331277 -0.909852470
## [58,] -0.252547295 1.074087587
## [59,] -0.258434735 1.080264533
## [60,] 1.018514470 0.440518097
## [61,] -0.252888928 1.077589980
## [62,] 1.014618278 0.443660081
## [63,] -0.002661904 0.029649445
## [64,] 1.014885149 0.439433448
## [65,] -0.642436700 -0.926002538
## [66,] -0.260554560 1.079711530
## [67,] -0.254902612 1.075640169
## [68,] -0.636935043 -0.916273341
## [69,] -0.641390661 -0.937548345
## [70,] -1.139601125 0.192932599
## [71,] -0.635801633 -0.918325113
## [72,] 0.679648460 -0.910909653
## [73,] -0.064092534 0.025451587
## [74,] -0.636985823 -0.923031478
## [75,] 0.678173858 -0.907515384
## [76,] -0.098647078 0.228242622
## [77,] 0.679494834 -0.911069332
## [78,] -1.139193015 0.192580599
## [79,] 1.014389680 0.439411072
## [80,] -0.631156510 -0.909244614
## [81,] -0.640453374 -0.927824524
## [82,] -0.253329976 1.073970516
## [83,] -1.137905449 0.193158567
## [84,] -0.257425285 1.077560244
## [85,] 1.014996624 0.444473597
## [86,] -0.252318577 1.075906415
## [87,] -0.638360762 -0.922930564
## [88,] 1.014490791 0.442862420
## [89,] -0.250972274 1.075128203
## [90,] 0.682090252 -0.909167491
## [91,] -0.255775264 1.080591233
## [92,] -0.262255864 1.082933483
## [93,] 0.067947228 -0.018707736
## [94,] 1.015628209 0.441862902
## [95,] -0.629733418 -0.911504802
## [96,] -1.133468574 0.194583716
## [97,] -1.141390270 0.194196046
## [98,] -0.250824950 1.074603465
## [99,] -0.639993059 -0.930628012
## [100,] 1.016713850 0.440211787
##
## $stress
## [1] 21.8583
plot_data <- as.data.frame(cbind(fit$points[,1], fit$points[,2], main_topic3$Main_Topic),
row.names = data$doc_id)
colnames(plot_data) <- c("Coordinate1", "Coordinate2", "Main_Topic")
(p1 <- ggplot(data = plot_data, aes(x = Coordinate1, y = Coordinate2)) + geom_point(size=2, shape=23))
The clustering for this data has more overlaps. So we increase the max.overlaps to make the label visible.
(p2 <- p1 + geom_point() + geom_text_repel(aes(label = row.names(plot_data)), size = 3, max.overlaps = 50)) # increase the max.overlaps to make the label visible
(p4 <- ggplot(data = plot_data) +
geom_point(mapping = aes(x = Coordinate1, y = Coordinate2, color = as.factor(Main_Topic))))
(p5 <- ggplot(plot_data, aes(Coordinate1, Coordinate2, color = as.factor(Main_Topic)))+
geom_point()+geom_text_repel(aes(label = row.names(plot_data)), size = 3, max.overlaps = 20))
## Warning: ggrepel: 91 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Display the full text as row names
topics3_text <- topics3
rownames(topics3_text) <- data$text
view(topics3_text)
Loop through each column and print the top 3 most likely comments for each topic
for (i in 1:6) {
top_rows <- topics3_text %>%
arrange(desc(.[[i]])) %>% # Sort by the i-th column
slice_head(n = 3)
cat("Top 3 comments for topic", i, ":\n")
print(rownames(top_rows))
cat("\n") # Add a newline for readability
}
## Top 3 comments for topic 1 :
## [1] "After getting thrown out of their last job and finding employment scarce in the United Kingdom, the six members of the Wonder Boys, better known as The Crazy Gang see an advertisement for employment in the gold strike town of Red Gulch in the Yukon Territory. It's from a newspaper clipping and on the back there's a story about Chamberlain saying the country better be prepared for war. Off they go to the Yukon and The Frozen Limits.<br /><br />By the way, it's case of misplaced Chamberlains. The clipping is forty years old and it refers to Joe Chamberlain and the Boer War rather than Neville in the current crisis. But that's typical of how things go for this crew. I can see Stan Laurel making the same mistake.<br /><br />Of course when they get there it's a ghost town inhabited only by young Jean Kent and her grandfather Moore Marriott. He's getting on in years and is a bit touched in the head. Marriott's got a gold mine that he's misplaced somewhere that he goes to in his sleep, that is when he's sleepwalking. The Gang better help him find that mine or otherwise pretty Ms. Kent won't marry stalwart trapper Anthony Hulme, but rather saloon owner Bernard Lee, a fate worse than death.<br /><br />This was my first exposure to the Crazy Gang and I can see both why they were so acclaimed in the UK and why they never made any impact across the pond. The jokes come fast and furious and then were a number of things that the Code in the USA just wouldn't allow. The jokes are also strictly topical British and a lot just wouldn't be gotten over here.<br /><br />The sight gags are universal, the final chase scene is worthy of anything that the Marx Brothers did in America. My suggestion is that if you watch The Frozen Limits, tape it if you have a working familiarity with British history and run it two or three times just to make sure you pick up everything. It will be worth it."
## [2] "Police officer Dirk Hendricks (Jamie Bartlett) files an amnesty application Alex Mpondo (Chiwetel Ejiofor).<br /><br />A member of the South African Parliament who cant remember the torture he once endured as a captive political activist.<br /><br />South African born attorney Sarah Barcant (Hilary Swank), meanwhile, returns to her homeland to represent Mpondo.<br /><br />As well as Steve Sizela, Mpondo's friend who was arrested along with him, and never heard from again.<br /><br />This film is one of the best films to come out about the South African regime of Aparthied, in the past.<br /><br />Everybody should watch it."
## [3] "This was my second experience of the Monkey Island series, the full seven years after I had been shown the first game. What was my response? \"Oh, great, we're playing a cartoon.\" I'm glad my brother shut me up then and played on, because the jokes caught my attention once again, as well as Armato's wonderful voice-acting of Guybrush - not to mention everyone else done well (I still think CMI's Elaine sounds better than EMI's). The cutscenes do well to illustrate something happening, and the art of both the game and cutscenes are excellent. When we found the CD with the originals, Secret and LeChuck's Revenge, we were both ecstatic and spent hours working through Revenge - one such moment was where we just sat down and blew half a day on it. However, CMI has to be the Monkey Island game I've played the most, especially for the return of swordfighting and combat on the high seas. That moment when you encounter Kenny and he tells you he's gone straight and then, \"I'm running guns!\" had both my brother and I in tears from laughter. And that's not the best part of the game, not by far."
##
## Top 3 comments for topic 2 :
## [1] "Ida Lupino is trapped in her own home by crazy Robert Ryan in \"Beware, My Lovely,\" a 1952 film from RKO. Lupino and Ryan did three films together and worked well as a team, both being consummate professionals and strong performers. In this film, based on a Broadway play called \"The Man,\" Lupino is a World War I widow who rents out a room in her home. She's very active and well-liked in her community and though her husband has been dead for two years, she's not ready to move on. The man who rents her room goes on vacation, and Lupino hires Robert Ryan to help her with some heavy-duty cleaning in the house. He's friendly enough to start, but later terrorizes her, locking her in the house, and not allowing her to answer the phone or the door, as he grows violent and more out of touch with reality.<br /><br />The character played by Ryan is shown in the beginning of the movie running away when he discovers a dead body in another house he's working in. It isn't clear whether or not he's the killer, since he seems surprised to see the body. He might be a split personality, as when his personality turns ugly toward Lupino, he seems to have no memory of his activities when he comes out of it. He doesn't know that he has the keys to Lupino's house in his pocket and doesn't know why he has tickets to a party that he bought from young children who came to the door.<br /><br />\"Beware, My Lovely,\" is a very suspenseful film, and the two leads give terrific performances. The tension builds to a very high level and ends in a way you're not expecting."
## [2] "36. THE HOSPITAL (comedy, 1971) A series of emergencies has gripped Manhattan Hospital. Patients are dying left and right due to overcrowded conditions, and a ineptitude staff. When a resident doctor is caught up in the death count the chief medical examiner, Dr. Bock (George C. Scott), is called in to investigate. Having worked as a doctor for too many years, and going through a mid-life crisis of his own, Dr. Bock finds the going tough. He decides to commit suicide. But then he meets Barbara (Diana Rigg), a young-hippie beauty. Whose keen insights on life help the depressed Bock.<br /><br />Critique: Black comedy features a 'tour-de-force' performance from veteran actor George C. Scott. He's good at playing high-strung, serious characters whose strict morals are severely tested. First half of the film unfolds like a melodrama, giving a pretty good account of hospital life, and the shambles they sometimes are. But then, as things look set for a dramatic climax it skews into slapstick comedy. If Paddy Chayefsky's script had maintained its dramatic feel I wonder if Scott would've walked out with another best Actor Oscar (he had previously won it, 'in-absentia', the year before). His breakdown (suicide) scene is one of the most gut-wrenchingly real in cinema history.<br /><br />QUOTE: Dr. Bock: \". . .last night I sat in my hotel room reviewing the shambles of my life and contemplating suicide. I said 'no Bock don't do it. You're a doctor, a healer, you're a necessary person, you're life is meaningful'. Then. . .I find out that one of my doctors was killed by a couple of nurses. . .how am I to sustain my feeling of meaningfulness in the face of this?\""
## [3] "i guess if they are not brother this film will became very common. how long were they can keep this ? if we were part,what should they do?so natural feelings,so plain and barren words.But I almost cried last night blood relationship brotherhood love knot film.in another word,the elder brother is very cute.if they are not brothers,they won't have so many forbidden factors,from the family、society、friends、even hearts of their own at the very beginning.The elder brother is doubtful of whether he is coming out or not at the beginning .maybe the little brother being so long time with his brother and even can't got any praise from his father,this made him very upset and even sad,maybe this is a key blasting fuse let him feel there were no one in the world loving him except his beloved brother. and i want to say ,this is a so human-natural feeling ,there is nothing to be shamed,you may fell in love your mother、brother、sister.Just a frail heart looking for backbone to rely on"
##
## Top 3 comments for topic 3 :
## [1] "Intruder in the Dust (1949) Dir: Clarence Brown <br /><br />Production: MGM<br /><br />Excellent 'Southern Gothic' tale, adapted from the Faulkner novel, about a black man, accused of the murder of a white man, who asks a young white boy he has befriended to help him prove his innocence. Lucas Beauchamp (Juano Hernandez) is something of an anomaly in this small town. He's a black man who owns the land he lives on and doesn't think much of the diseased social order that mostly keeps the peace here and in many similar small towns. So when Lucas is found holding a gun over the dead body of Vinson Gowrie, shot in the back no less, young Chick Mallison (Claude Jarman) (who Lucas once saved after Chick fell through the ice while hunting on his land) fears that the town finally has the chance to \"make Lucas a n*****.\" Arrested, and with a very real chance of being lynched before the night is through, Lucas reaches out to Chick for help, as the only person he knows \"not cluttered with notions\". Chick asks his Uncle John, a lawyer, to defend Lucas and while the man is initially bothered by his own notions he agrees and they race against the gathering mob to save Lucas' life.<br /><br />The film has an uncommon frankness for its time and is mostly free of moralizing. The lawyer character has a tendency to speak incredibly self-aware dialogue that sounds mostly like something from the printed page, but it has minimal impact on the tone. That's a credit to the rich characterization of everyone else. Juano Hernandez, who had mostly appeared in Oscar Micheaux films, is superb as the proud Lucas. Porter Hall as the murdered man's father, in maybe the best role I've ever seen him in, and Elizabeth Patterson as a plucky old lady sympathetic to Lucas' case, standout in support roles. The setting is perfectly realized. It is actually filmed in Oxford, Mississippi, Faulkner's hometown. Brown also uses the crowd in an effective way, it's always an anonymous mob against a single person (like Lucas when he's arrested or John when he's going up to his office), that is very threatening. Or the grotesquerie of the whole town gathering at the jailhouse to witness the lynching like it was a parade. Of note is an absolutely riveting scene when Chick and his friend Aleck go evidence gathering in a cemetery. Robert Surtees (THE BAD AND THE BEAUTIFUL, THIRTY SECONDS OVER TOKYO, BEN-HUR) shot the picture. <br /><br />*** 1/2 out of 4"
## [2] "\"What's his name?\" \"Loudon.\" \"Loudon what?\" \"Clear.\"<br /><br />That gag still gets me, TWENTY ONE years after the film was released.<br /><br />I loved the film back then and I love it today. I must have watched this a hundred times back in the day, and when I bought the DVD recently I could still remember some of the dialogue.<br /><br />Madonna plays Nikki Finn, a young woman jailed for a crime she didn't commit. When she gets out she decides to seek revenge.<br /><br />Griffin Dunne (whatever happened to him?), plays an attorney for his fiancée's father (John McMartin). The future father-in-law asks Loudon to take Nikki from prison to the bus station and to make sure she gets on the bus, as part of a supposed new public relations programme. A seemingly easy task, but there are complications aplenty, some funny dialogue, and some admittedly stupid-but-funny scenes along the way.<br /><br />Madonna has a stupid voice in this film, which until I was able to watch with subtitles made one or two lines of dialogue incomprehensible for me (hence only 8/10), but on the other hand I can't imagine her doing it in her normal voice.<br /><br />This film shows Madonna's comic side (too lacking these days, perhaps), and she genuinely is funny in the role. Dunne makes a great foil, while Haviland Morris is perfect as the uppity fiancée.<br /><br />Yes, it's predictable, yes, the jokes could be better, but I think this is a great film and will happily sit down and watch it 100 times more."
## [3] "Alejandro Amenabar, the young and talented Spanish director, clearly shows us he is a serious film maker. Anyone doubting it, should have a look at his latest film \"The Sea Inside\". This is a movie that has been rewarded with numerous accolades, not only in Spain, but throughout the world, wherever this wonderful movie has been shown.<br /><br />If you have not seen the film, perhaps you would like to stop here.<br /><br />Ramon Sampedro is a man confined to bed. Being quadriplegic, he depends on the kindness of strangers for everything. Since his accident, Ramon only thinks in one thing alone: how to end his life! This is the moral issue at the center of the story, based on the real Ramon Sampedro's life.<br /><br />Mr. Amenabar tells the story from Ramon's point of view. There is nothing here that is false or manipulative on his part. After all, he relies on facts that were well known in his country as this case became a \"cause celebre\" in favor of euthanasia, a theme that no one in that country wanted to deal with in Spain.<br /><br />With its background of being a predominantly Roman Catholic country, Spain has evolved into one of the most democratic societies in Europe, a distinction that is more notable because of its long years dominated by a dictator. Yet, in spite of the advances in that society, the idea of taking one's own life, is something not clearly understood by the majority of its citizens, who still considered this subject as something that could not be done in their country.<br /><br />Ramon Sampedro was a man that loved life. He lived an intense life as a young man when he enlisted as a sailor to discover the world. Having no money, this was the only way for him to see other lands, experience other cultures. Ramon's love affair with the sea, is something that people in Galicia learn to love from their childhood. Imagine how that same friendly sea is the one that takes away Ramon's life, as he knew it! In a second, Ramon goes from a vibrant young man into a vegetable!<br /><br />Ramon's family is shattered by the experience. Suddenly they must leave everything aside to take care of him at home. His brother and sister-in-law, are stoic people that deal with the situation as a matter of fact. Their lives become something of an afterthought, because Ramon's life comes first. They tend to the sick man without protesting, or blaming Ramon for the sacrifices they must make to keep him alive.<br /><br />That is why, in their minds, the Sampedros can't comprehend Ramon's wishes to end it all. Haven't they given up having a normal life to take care of him? This moral issue weighs heavily on these uncomplicated and simple people because in their minds, they are doing what came naturally.<br /><br />The second subject of the movie is the legal issue of the euthanasia and the well meaning people that suddenly enter Ramon's life in their desire to help him put an end to his suffering. There's Julia, the lawyer who is herself handicapped and suffers from a rare malady. There is Rosa, the fish cannery worker who becomes infatuated with Ramon. <br /><br />Javier Bardem, makes a brilliant Ramon Sampedro. His transformation is total. We don't doubt from one moment he is no one else but the paralyzed man on that bed. Mr. Bardem can only use his face in order to convey all the emotions trapped inside Ramon. Mr. Bardem makes this man real. This is perhaps Javier Bardem's best role of his career. He surpasses his own award winning performance as Reynaldo Arenas, the late Cuban poet he portrayed in \"Before Night Falls\". <br /><br />In the supporting roles, Belen Rueda, makes an impressive appearance as Julia, the woman fighting her own physical problems. Lola Duenas is also effective as Rosa, the kindred soul that loves Ramon deeply. Celso Bugallo, as Ramon's brother shows a man at a crossroads of his own life. Mabel Rivera makes a compassionate Manuela, the sister-in-law that never asks anything of life, but tends to Ramon without questioning why she has to do it, at all.<br /><br />Mr. Amenabar also has composed the haunting music score for the film. He is a man that never cease to surprise. One wonders what his next project will be, but one wishes him success in whatever he might decide to do in the future."
##
## Top 3 comments for topic 4 :
## [1] "I wonder sometimes if maybe Meryl Streep has become so accepted as the most impressive, versatile actress since, well, maybe just about the beginning of the sound era that maybe her talent is now taken for granted. There are probably about three tics that she relies on consistently throughout her performances (most noticeably a pinched lip), but other than that, her performances are amazingly variable and original and fresh expressions of internal workings. Even though \"Sophie's Choice\" and \"A Cry in the Dark\" and \"Silkwood\" may be showier, her work here and in \"Bridges in Madison County\" is remarkable, too - just more subtle. In \"One True Thing\", she is mostly sweet and in love with her domesticated life, and Streep makes what could be routine, even boring, seem attractive and charming. I think that she must work out these mini-theses for each character and find what things make this person real and interesting. She works from the inside out with each character, and maybe it's this essential quality that has evoked the main criticism of aloofness or self-consciousness. I don't think she's cold at all, but instead has thought out her character's unique qualities. I think her critics are confusing self-consciousness with intelligence. Not too many other actors would be so complexly thoughtful and creative as to make Kate Muldrun lightweight and carefree within her beloved, homey environment, only to later reveal unprecedented depth because of her genuine attachment to that homey environment. Her performances are of an unsurpassed consistency, she rarely does anything wrong. I wonder if it's conceivable for any of Streep's pictures to not seem diminished because of her presence... Anyway, Kate loves her home, and her affection for her \"family life\" is as endearing as her new conflict within the home is jarring. When all of this comes together, and Kate starts to recognize that she can no longer function in the same capacity, and she breaks the pie dish and screams out that she is not handicapped, it is painfully sad to watch because this has not been someone prone to emotion. Streep is smart enough and generous enough to recognize how much better everything works because she has felt out the dramatic validity of Kate and it's really the only scene when she allows her character to go. But how refreshingly true it is to see a character who can really surprise you by displaying something that you wouldn't have thought possible. Once again, Streep's character has at least three dimensions... God, this sounds like a thesis itself, but as an actress, Streep just has a special kind of intelligence, incredible empathy and great expressive skills. The movie itself is probably somewhat mediocre. I suppose William Hurt is meant to be an unlikeable jerk, and he does pompousness very well. I think Hurt is really kind of creepy, though. Script is quite standard - another tribute to Streep that she was as touching and believable as she was."
## [2] "Imagine a world, in which everyone treats anyone nicely, no foul word is ever uttered, office bickering is nonexistent, and your boss invites the office crowd regularly to self-cooked dinners where you can chat about latest interior design styles. Everything is neat, pleasant - well, just nice. In other words: you are in hell. After being dropped off in the middle of nowhere, mid-thirties Andreas (Trond Fausa Aurvaag) starts a new job as a book-keeper in a small, clean city. From the beginning he feels foreign in this proper, impersonal world of superficial kindness, surrounded by pleasant but lifeless interior architecture and likewise colleagues. Food tastes of nothing, drinks don't get you drunk, no children anywhere; after initial steps of fitting in, Andreas searches for ways to escape the bland new world. He doesn't know where he came from anymore, but still remembers rich tastes, true feelings - anything beyond the non-committal flatline life he's leading now. THE BOTHERSOME MAN resonates ideas of Huxley and Kafka, but here the cruelty is the omnipresent noncommittal neatness. Unlike PLEASANTVILLE this is not about narrow-minded bigotry, more a fable of our urban free-world civilisation of fitting in. It mostly reminds one of the ingenious FIGHT CLUB scene, in which Edward Norton walks through a mock-IKEA catalogue. Spiced with macabre humour, this Scandinavian laconic tale convinces on every level: story, characters, and relevance. A true screen gem. 8/10"
## [3] "I have only see three episodes of Hack, starring David Morse, and it looks as though I've missed 37 episodes! well thats if ITV 3 are showing them in chronological order. I've just watched 'Misty Blue' (episode 38). I have really enjoyed the 3 episodes, but then I'm a David Morse fan, (esp St. Elsewhere). For any one reading this, Hack is excellent. Pity its being aired on ITV 3. The cast is strong, though I cant get used to the idea of David playing a bent cop, still we all know he's the good guy wrongly accused. I see Gary Cole has guest starred, what ever happened to 'Midnight Caller'? Just wonder if Hack is available on DVD (yet). Lovin it. Cheers."
##
## Top 3 comments for topic 5 :
## [1] "I love military comedies (Sgt. Bilko, Stripes, In The Army Now, Major Payne) and Down Periscope is hilarious, but it has a heart as well.<br /><br />The Stingray SS-161 (The USS Pampanito) was gorgeous. Absolutely beautiful, a piece of art come alive. So it was a diesel engine sub, so what? I learned that the Aircraft Carrier USS Ranger (which stood in for The Enterprise in Star Trek IV), a huge ship, was 'conventionally powered', which might mean that Ranger was a diesel too.<br /><br />My favorite scene: Pascal: Jesus, Buckman, this can's been on the stingray since Korea! This can expired in 1966! Buckman: (Takes finger full and tastes it) What's the matter, sir? It still tastes like creamed corn.<br /><br />Pascal: (Yelling) Except, it's DEVILED HAM!! Buckman: That would be a problem.<br /><br />It's story, perhaps a wee strained, seemed plausible. Winslow respected Dodge, and seemed to care about him, so he wanted to give Dodge a chance. He gave him a battered but still seaworthy Balao-class sub, and assigns him the task of using the diesel sub to evade the nuclear Navy and 'attack' Charlston Harbor, and Norfolk. 2-star Admiral Graham (with his eye on his third star, and a grudge against Dodge) assigned him the ragtag crew, hoping that they would screw up so Dodge would lose. Can Tom Dodge get the crew up to speed and working as a team, and can he take an old, out of date sub, and beat the Navy's best?"
## [2] "A respectable royal rumble event<br /><br />1. Edge Vs Shawn Michaels<br /><br />7.5/10 A very strong opener...edge's heel performance was sublime as it was during the rumble event ..overall id say EDGES NIGHT....<br /><br />2. Undertaker Vs Heidenreich CASKET MATCH<br /><br />7/10 a lot of people hated this rivalry though i liked it, i thought heidenreich really played his character well. the match wasn't amazing in excitement that was until kane and snitcky get involved .it gets better as it goes on.<br /><br />3. Kurt Angle Vs Big Show Vs JBL(WWE CHAMP) WWE TITLE MATCH<br /><br />7.5/10 a surprisingly good match , as there was only 1 really exciting in ringer in it ..angle of course......very good title match ..good pace.. though a predictable end .but aren't all royal rumble title matches predictable.<br /><br />4. Triple H (WORLD TITLE) Vs Randy Orton WORLD TITLE MATCH<br /><br />6.5/10 actually not that good for the guys involved, went on too many dry patches, orton sold his concussion amazingly, ending though was some what of an anti climax.<br /><br />5. ROYAL RUMBLE EVENT... btw during the other matches there were a few segments...two which were really cool ...cena rapping on Christian and guerrero stealing flairs number 30 entry ticket. the event was good 7/10 would have got a 9 if cena won but unfortunately my biggest enemy batista wins(THOUGH I UNDERSTooD WHY) ..vince comes down and takes a drop and the whole arena crack up in laughter."
## [3] "Years before pre-nuptial agreements became a regular thing, Ernest Lubitsch made a screen comedy on which they are the basis. Bluebeard's Eighth Wife involves Gary Cooper as a multi-millionaire living on the French Riviera who's been married seven times and now marries Claudette Colbert for number eight. But Cooper's a good sport about it, he always settles with his ex-wives for a $50,000.00 a year as per an agreement they sign before marrying him. Sounds like what we now call a pre-nuptial agreement.<br /><br />Of course Claudette wants a lot more than that and she feels Cooper takes an entirely too business like approach to marriage. She'd like the real deal and is willing to go some considerable lengths to get it.<br /><br />Bluebeard's Eighth Wife has some really funny moments, the original meeting of Cooper and Colbert in a men's store where Cooper is insisting he wants only pajama tops and Colbert looking for only bottoms. And of course my favorite is Colbert trailing and blackmailing the detective Cooper sends to spy on her. Herman Bing has the best supporting role in the film as that selfsame, flustered detective.<br /><br />I've often wondered how back in the day Hollywood could get away with casting so many people who are non-French in a film like this. Of course Cooper is an American and Colbert of the cast is the only one actually of French background. Though David Niven is charming as always, having him be a Frenchman is ludicrous, he is sooooooo British.<br /><br />Nevertheless Bluebeard's Eighth Wife is an enjoyable film and a great example of what was called 'the Lubitsch touch' back in the day."
##
## Top 3 comments for topic 6 :
## [1] "Riding high on the success of \"Rebel Without a Cause\", came a tidal wave of 'teen' movies. Arguably this is one of the best. A very young McArthur excels here as the not really too troubled teen. The story concentrates more on perceptions of delinquency, than any traumatic occurrence. The supporting cast is memorable, Frankenheimer directs like an old pro. Just a story of a young man that finds others take his actions much too seriously."
## [2] "I first saw this film in the late 60's, and try to see it every time it comes on TV, which, unfortunately, isn't often. Now that I have TCM and FMC, I hope it will be on at least once a year. Like Louis Gossett Jr. in An Officer and A Gentleman, Jack Webb delivers an unbelievably great performance as a Parris Island Drill Sergeant with the classic screw-up recruit, and the story line in this one, though dated, is touching and very well acted. And having real jar-heads in the cast certainly helped in the realism of the film as well. It's a great film with top-notch acting and a superb story. See it if you have the chance-It's well worth the time!"
## [3] "Well, it's safe to say that Subconscious Cruelty is one weird film! Supposedly an insight into the human mind, Subconscious Cruelty is comprises four macabre and bizarre tales of the extreme. The first segment, entitled 'Ovarian Eyeball' is really just a warm up, but it's good in that it gives the viewer an idea of what to expect from the next three segments. It simply sees a naked woman laid out on a table, while another woman cuts into her abdomen and pulls out a human eyeball! I've got no idea what the point is, but it certainly makes for visceral viewing. The following story is the best of the bunch, and takes in the \"old favourite\" sick movie theme of incest. The segment follows a man who lives alone with his pregnant sister. He's repulsed by her pregnancy - yet he wants to have sex with her anyway, and naturally he gets his way. This story stands out because of the monotonous and 'matter of fact' narration, as well as the ending - which doesn't fail to deliver the shocks. This segment is well acted, well filmed and easily the highlight of Subconscious Cruelty.<br /><br />Naturally, the next two sections aren't as good as the second one; so the only way from there is down, but director Karim Hussain still manages to pull something out of the bag before the film ends. He doesn't do it right away, however, as the third segment is the weakest of the film, and simply sees a lot of people have sex with the ground. It's very surreal, and therefore memorable for that same reason; but there doesn't seem to be a lot of point to it, and I was in the mood for something a bit more morbid after the second section. The film ends on a high, however, as while I'm not entirely sure what the point was - the final segment features the film's best imagery. This segment focuses on religion, and certainly isn't for anyone that values it! Director Karim Hussain has achieved something here - as while this collection of four 'extreme' stories doesn't come together as a complete whole, the film almost feels tasteful as it's shot in such an eloquent and eye catching manner. The director would seem to have been imitating the highly respected surreal director Luis Buñuel, albeit with gore, rape and incest; and if you ask me, he hasn't done a bad job at all. Not for everyone, but certainly worth a look for extreme fanatics!"
Interactive visualization of topic clusters using plotly.
# Step 1: Convert row names to a column
topics2 <- topics3
rownames(topics2) <- data$doc_id
main_topic3$ID <- rownames(main_topic3)
topics2$ID <- rownames(topics2)
plot_data$ID <- rownames(plot_data)
# Step 2: Merge datasets on the 'ID' column, merge can only merge two datasets at a time
merged_data <- merge(plot_data, topics2, by = "ID")
merged_data <- merge(merged_data, main_topic3, by = "ID")
# Step 3: change the main topic column to categorical
merged_data$Main_Topic.x <- as.factor(merged_data$Main_Topic.x)
merged_data
## ID Coordinate1 Coordinate2 Main_Topic.x 1 2
## 1 10022_10.txt -0.641390661 -0.937548345 5 0.004528658 0.003268495
## 2 10068_8.txt 0.677913161 -0.911336729 3 0.004027181 0.003721005
## 3 10202_9.txt 0.682331277 -0.909852470 3 0.003592651 0.004010456
## 4 10383_8.txt -0.025580020 0.006779017 6 0.076260529 0.065637458
## 5 10384_7.txt 1.015871854 0.440743774 4 0.003734041 0.003765360
## 6 10402_8.txt -0.252888928 1.077589980 1 0.974952149 0.004295069
## 7 10591_10.txt -1.133468574 0.194583716 2 0.003971297 0.974646766
## 8 10615_8.txt 1.014635679 0.444618015 4 0.003668067 0.003538522
## 9 10828_9.txt -0.636985823 -0.923031478 5 0.004104466 0.003490408
## 10 10923_7.txt -1.137203130 0.192073121 2 0.004022357 0.975493925
## 11 11179_10.txt -0.250972274 1.075128203 1 0.975432062 0.003733722
## 12 11278_10.txt 1.014618278 0.443660081 4 0.003711816 0.003384045
## 13 11477_10.txt -0.002661904 0.029649445 6 0.076625639 0.063145685
## 14 11496_8.txt 1.015628209 0.441862902 4 0.003376696 0.004033797
## 15 11590_10.txt 0.682090252 -0.909167491 3 0.003554483 0.004036703
## 16 11616_7.txt -0.032618839 0.015532426 6 0.068623959 0.073364747
## 17 11673_10.txt -1.139193015 0.192580599 2 0.003671823 0.975317154
## 18 11719_9.txt 0.680034350 -0.909458457 3 0.003641381 0.004030541
## 19 1180_7.txt -0.258120692 1.080819277 1 0.974618998 0.003462129
## 20 11814_10.txt -0.642436700 -0.926002538 5 0.003889198 0.004108871
## 21 11819_10.txt -0.252318577 1.075906415 1 0.975006668 0.003937032
## 22 11879_10.txt 1.014996624 0.444473597 4 0.004563596 0.003284910
## 23 11948_10.txt -1.137952268 0.194366625 2 0.004253052 0.975093939
## 24 12163_7.txt -0.252547295 1.074087587 1 0.975457999 0.003980400
## 25 12216_10.txt -0.639709901 -0.926193594 5 0.003600656 0.003535744
## 26 12256_9.txt -0.638360762 -0.922930564 5 0.003878342 0.003203179
## 27 12314_10.txt 0.678053066 -0.909667816 3 0.003939854 0.003800533
## 28 1246_9.txt 0.677577766 -0.911426274 3 0.004099916 0.003684199
## 29 1519_9.txt 1.018521592 0.440783974 4 0.003792247 0.003600870
## 30 1527_10.txt -0.262255864 1.082933483 1 0.974355464 0.003382408
## 31 1686_10.txt -0.260554560 1.079711530 1 0.975061543 0.003633374
## 32 1717_10.txt 0.678150375 -0.908599685 3 0.003702056 0.004316915
## 33 1755_10.txt -0.255775264 1.080591233 1 0.974671282 0.003328190
## 34 1860_10.txt 0.067947228 -0.018707736 6 0.064977259 0.066584764
## 35 2161_7.txt 1.016583317 0.439006118 4 0.003508989 0.004722102
## 36 2180_8.txt 1.016622341 0.438966195 4 0.003589898 0.003514805
## 37 2244_7.txt 0.680671479 -0.909409151 3 0.003727227 0.003670170
## 38 2293_8.txt 1.014490791 0.442862420 4 0.003375201 0.003770505
## 39 2329_10.txt -0.631156510 -0.909244614 5 0.003800033 0.003687040
## 40 2652_7.txt -0.253510439 1.077124929 1 0.975078093 0.003487783
## 41 2817_10.txt 1.015711015 0.441638417 4 0.003825840 0.003916312
## 42 2830_9.txt -1.139245783 0.192664309 2 0.003821506 0.975451132
## 43 2953_10.txt -1.139601125 0.192932599 2 0.003732614 0.975448223
## 44 3095_10.txt 0.677383006 -0.908037466 3 0.003854936 0.003667709
## 45 3249_9.txt 0.676498171 -0.909486960 3 0.004003020 0.003698205
## 46 3369_7.txt 0.677983370 -0.908945174 3 0.004286975 0.003410217
## 47 3410_10.txt -1.138783437 0.194629846 2 0.003902675 0.975179568
## 48 3417_9.txt -0.250824950 1.074603465 1 0.975435387 0.004092752
## 49 3579_8.txt -0.639981485 -0.928085809 5 0.003698182 0.003959563
## 50 3816_7.txt -0.636935043 -0.916273341 5 0.003936403 0.003983550
## 51 4033_8.txt 0.679494834 -0.911069332 3 0.003842040 0.003826513
## 52 41_9.txt 1.014885149 0.439433448 4 0.003641923 0.003525541
## 53 4150_7.txt -0.639686157 -0.924927268 5 0.003728700 0.003290359
## 54 427_10.txt 1.018514470 0.440518097 4 0.003736326 0.003503171
## 55 4541_9.txt -0.064092534 0.025451587 6 0.094635900 0.067667756
## 56 4818_10.txt -0.003862475 -0.025259907 6 0.070954674 0.067302978
## 57 5177_8.txt 0.685113696 -0.910790665 3 0.003941031 0.003818988
## 58 5301_9.txt 0.679356541 -0.910895682 3 0.003877325 0.003678134
## 59 5444_10.txt -0.640453374 -0.927824524 5 0.003553142 0.003980205
## 60 5564_10.txt -0.253329976 1.073970516 1 0.975369756 0.003874428
## 61 558_10.txt -1.141847595 0.195061576 2 0.003537927 0.975306538
## 62 5695_10.txt -0.639820204 -0.929885801 5 0.004133521 0.003865442
## 63 5849_7.txt -0.252738845 1.078870986 1 0.974888822 0.003974443
## 64 5935_7.txt -0.629733418 -0.911504802 5 0.004052968 0.003580363
## 65 6055_10.txt -0.018422252 0.018055421 6 0.073516049 0.063835116
## 66 6080_9.txt 1.014389680 0.439411072 4 0.003693742 0.003633427
## 67 616_10.txt 1.014882573 0.442695231 4 0.004328068 0.003424443
## 68 6162_10.txt -0.098647078 0.228242622 1 0.520721232 0.054336992
## 69 6186_7.txt -0.632417665 -0.911990063 5 0.003761790 0.003789042
## 70 6260_9.txt 0.683304591 -0.909463975 3 0.003646264 0.003564507
## 71 6581_10.txt -1.139128135 0.193631928 2 0.003848981 0.975168308
## 72 6622_9.txt -1.138627407 0.193477883 2 0.003735392 0.975149694
## 73 670_7.txt -1.141824929 0.192480057 2 0.003857934 0.975605971
## 74 6723_7.txt -0.262271037 1.081522839 1 0.974995415 0.003556908
## 75 7091_7.txt 1.019093584 0.442785802 4 0.003743125 0.003501878
## 76 7162_9.txt -0.635801633 -0.918325113 5 0.003854566 0.003574883
## 77 731_8.txt -1.142020561 0.195353262 2 0.003679053 0.975385699
## 78 7354_8.txt -0.254902612 1.075640169 1 0.975283858 0.003562946
## 79 7458_10.txt -0.639503123 -0.929851292 5 0.004291596 0.003362500
## 80 7459_10.txt 0.678226988 -0.911417802 3 0.003960101 0.003801618
## 81 7500_10.txt -1.141390270 0.194196046 2 0.003755468 0.975538746
## 82 7639_7.txt -0.258434735 1.080264533 1 0.974982989 0.003503544
## 83 7692_10.txt -1.137905449 0.193158567 2 0.004096784 0.975253629
## 84 7712_7.txt -0.640457210 -0.929419868 5 0.003837329 0.003943034
## 85 7888_10.txt 1.019837183 0.444938214 4 0.003668127 0.003735686
## 86 7937_10.txt 1.016648427 0.443544810 4 0.004249970 0.003518712
## 87 8040_8.txt -0.255557983 1.080171945 1 0.974488670 0.003336214
## 88 8067_8.txt 1.019761681 0.445445967 4 0.004133492 0.003756597
## 89 8197_8.txt 1.016713850 0.440211787 4 0.004000860 0.003548698
## 90 8236_8.txt -0.251415543 1.074242541 1 0.975389175 0.003345053
## 91 8344_7.txt 0.679648460 -0.910909653 3 0.003915299 0.003656570
## 92 8677_8.txt 1.018855331 0.440804815 4 0.003861470 0.003554488
## 93 8695_8.txt 0.019115446 0.063056204 6 0.091905307 0.065802247
## 94 9035_9.txt 0.678173858 -0.907515384 3 0.003694454 0.003711353
## 95 9330_9.txt -0.639593823 -0.927068974 5 0.004177331 0.003286211
## 96 9342_8.txt -0.257425285 1.077560244 1 0.975244771 0.003553794
## 97 9454_9.txt -0.255018498 1.078246749 1 0.974241011 0.003314400
## 98 9577_10.txt 1.019455547 0.441380742 4 0.003917307 0.003942387
## 99 966_10.txt -1.140428404 0.195424847 2 0.004065059 0.975381469
## 100 9665_10.txt -0.639993059 -0.930628012 5 0.004043557 0.003572743
## 3 4 5 6 Main_Topic.y
## 1 0.003778799 0.003795489 0.975076978 0.009551582 5
## 2 0.975492019 0.003757728 0.003628485 0.009373583 3
## 3 0.975382142 0.003840334 0.003644097 0.009530319 3
## 4 0.073106999 0.066789612 0.067338271 0.650867131 6
## 5 0.003906618 0.975100340 0.003509989 0.009983651 4
## 6 0.003696658 0.003322373 0.003861412 0.009872339 1
## 7 0.003809618 0.003568024 0.003535955 0.010468339 2
## 8 0.004365328 0.974391128 0.003402492 0.010634462 4
## 9 0.003815045 0.003742698 0.975428874 0.009418509 5
## 10 0.003599712 0.003922359 0.003647472 0.009314174 2
## 11 0.003875173 0.003698497 0.003802654 0.009457892 1
## 12 0.004397419 0.974545427 0.003517356 0.010443936 4
## 13 0.067792476 0.079730174 0.063202834 0.649503193 6
## 14 0.003941775 0.975156578 0.003723458 0.009767696 4
## 15 0.975449735 0.003780952 0.003760277 0.009417850 3
## 16 0.063471975 0.071858734 0.071667708 0.651012877 6
## 17 0.003722767 0.004098842 0.003582080 0.009607334 2
## 18 0.975187508 0.003723256 0.003597928 0.009819387 3
## 19 0.004130447 0.003633550 0.003713699 0.010441177 1
## 20 0.003898550 0.003414355 0.975481847 0.009207179 5
## 21 0.003370306 0.003646812 0.004228759 0.009810423 1
## 22 0.003873484 0.974554220 0.003664991 0.010058798 4
## 23 0.003967387 0.003603135 0.003374089 0.009708398 2
## 24 0.003506609 0.003902208 0.003818676 0.009334109 1
## 25 0.003848027 0.003925668 0.974981147 0.010108757 5
## 26 0.004122955 0.003878225 0.974949170 0.009968128 5
## 27 0.975229251 0.003780075 0.003540496 0.009709791 3
## 28 0.975151453 0.003758523 0.003525020 0.009780889 3
## 29 0.004271536 0.975308432 0.003573919 0.009452996 4
## 30 0.003927853 0.003847565 0.003718610 0.010768100 1
## 31 0.004104368 0.004052356 0.003359607 0.009788753 1
## 32 0.974926182 0.003749386 0.003332571 0.009972889 3
## 33 0.004182504 0.003662842 0.003893490 0.010261691 1
## 34 0.079040486 0.078539345 0.061503707 0.649354438 6
## 35 0.003560160 0.974973264 0.003541701 0.009693785 4
## 36 0.003716687 0.975130849 0.004453535 0.009594227 4
## 37 0.975334704 0.003840097 0.003745516 0.009682286 3
## 38 0.003836016 0.974755147 0.004175553 0.010087577 4
## 39 0.003837790 0.003921581 0.975645677 0.009107879 5
## 40 0.004387709 0.003607595 0.003616630 0.009822189 1
## 41 0.003570716 0.975187633 0.003638247 0.009861253 4
## 42 0.003867311 0.003853246 0.003538496 0.009468309 2
## 43 0.003986207 0.003837260 0.003534760 0.009460935 2
## 44 0.975041833 0.003709223 0.003679598 0.010046703 3
## 45 0.974917637 0.003488803 0.003813249 0.010079086 3
## 46 0.974964017 0.004215917 0.003337600 0.009785274 3
## 47 0.004511134 0.003586215 0.003350033 0.009470376 2
## 48 0.003839033 0.003675424 0.003561669 0.009395735 1
## 49 0.005173420 0.002952259 0.974402549 0.009814026 5
## 50 0.003493961 0.003776716 0.975522237 0.009287132 5
## 51 0.975423231 0.003835016 0.003602638 0.009470562 3
## 52 0.003960483 0.974942185 0.003745867 0.010184000 4
## 53 0.004042806 0.003955335 0.974939180 0.010043619 5
## 54 0.003962870 0.975373474 0.003959924 0.009464235 4
## 55 0.067222474 0.084181404 0.081005034 0.605287433 6
## 56 0.075634660 0.068503236 0.066385687 0.651218764 6
## 57 0.975441354 0.004046586 0.003418967 0.009333073 3
## 58 0.975406702 0.003663152 0.003834158 0.009540528 3
## 59 0.003872738 0.003771942 0.975252760 0.009569212 5
## 60 0.003725486 0.003713240 0.003697480 0.009619611 1
## 61 0.004462302 0.003532845 0.003795760 0.009364628 2
## 62 0.003742137 0.003379954 0.975217948 0.009660998 5
## 63 0.003577608 0.003402720 0.004246517 0.009909889 1
## 64 0.003812728 0.003781685 0.975586311 0.009185945 5
## 65 0.067514954 0.071546986 0.072440316 0.651146578 6
## 66 0.004071044 0.975153107 0.003591034 0.009857646 4
## 67 0.003944545 0.974837859 0.003387340 0.010077745 4
## 68 0.039951515 0.033949404 0.034165719 0.316875137 1
## 69 0.003710912 0.003906069 0.975580938 0.009251249 5
## 70 0.974231105 0.004480121 0.003431186 0.010646816 3
## 71 0.003891454 0.003799920 0.003595701 0.009695636 2
## 72 0.003632456 0.003896137 0.003660581 0.009925740 2
## 73 0.003716418 0.003820232 0.003830659 0.009168787 2
## 74 0.003665131 0.004082774 0.003628154 0.010071618 1
## 75 0.004025505 0.975475588 0.003931354 0.009322550 4
## 76 0.003718669 0.003941778 0.975445571 0.009464533 5
## 77 0.003886305 0.003499464 0.004059008 0.009490471 2
## 78 0.003873830 0.003797535 0.003807645 0.009674185 1
## 79 0.003551099 0.003864906 0.974968363 0.009961537 5
## 80 0.975407758 0.003619697 0.003676643 0.009534185 3
## 81 0.004242543 0.003630164 0.003655162 0.009177917 2
## 82 0.004464193 0.003922550 0.003362599 0.009764126 1
## 83 0.003775678 0.003819182 0.003410659 0.009644067 2
## 84 0.003682348 0.003384914 0.974654190 0.010498184 5
## 85 0.004528309 0.975226973 0.003374491 0.009466415 4
## 86 0.004083931 0.975173822 0.003410726 0.009562839 4
## 87 0.005295387 0.003352110 0.003658348 0.009869270 1
## 88 0.004320162 0.975152952 0.003174900 0.009461897 4
## 89 0.003443500 0.975207282 0.004096538 0.009703122 4
## 90 0.004186871 0.003873956 0.003842072 0.009362872 1
## 91 0.975371316 0.003976159 0.003516944 0.009563711 3
## 92 0.003967187 0.975438963 0.003698826 0.009479066 4
## 93 0.070370105 0.066530390 0.058646640 0.646745311 6
## 94 0.975614821 0.004025421 0.003837260 0.009116691 3
## 95 0.004140765 0.003745353 0.974336465 0.010313875 5
## 96 0.003756440 0.003991044 0.003716658 0.009737292 1
## 97 0.003445024 0.003687144 0.005184017 0.010128404 1
## 98 0.003656528 0.975361079 0.003566889 0.009555811 4
## 99 0.003611046 0.003498412 0.004002991 0.009441023 2
## 100 0.003812090 0.003581393 0.975013379 0.009976838 5
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#Creating a tooltip
merged_data$tooltip <- paste0(
"1: ", merged_data$`1`, "<br>",
"2: ", merged_data$`2`, "<br>",
"3: ", merged_data$`3`, "<br>",
"4: ", merged_data$`4`, "<br>",
"5: ", merged_data$`5`, "<br>",
"6: ", merged_data$`6`, "<br>"
)
# Use the text aesthetic to pass labels for tooltips and text
p1 <- ggplot(data = merged_data, aes(x = Coordinate1, y = Coordinate2, text = tooltip, color = Main_Topic.x, label = ID)) +
scale_fill_viridis()+
geom_point(size = 2, shape = 2) + # Points
geom_text(aes(label = row.names(plot_data)), vjust = -5, size = 3) + # Text labels
theme_minimal()
# Convert to a plotly interactive plot
interactive_plot <- ggplotly(p1, tooltip = "text")
# Display the interactive plot
interactive_plot
We are creating 3 visualizations using 3 lexicons. As introduced in the Text mining with R reading (Silge and Robinson, D, 2017)
AFINN from Finn
Årup Nielsen,
bing from Bing Liu
and collaborators, and
nrc from Saif
Mohammad and Peter Turney.
First, get the main topic(maximum beta) for each word.
top_topics <- tidy_topics %>%
group_by(term)%>%
filter(beta == max(beta))%>%
arrange(topic)
top_topics
## # A tibble: 2,054 × 3
## # Groups: term [2,054]
## topic term beta
## <int> <chr> <dbl>
## 1 1 accent 0.00119
## 2 1 actionspi 0.00119
## 3 1 activistbr 0.00119
## 4 1 advertis 0.00238
## 5 1 african 0.00358
## 6 1 againbr 0.00119
## 7 1 againi 0.00119
## 8 1 age 0.00239
## 9 1 ago 0.00119
## 10 1 aidan 0.00238
## # ℹ 2,044 more rows
Using ‘afinn’ lexicon to give each word a sentiment score(-5 to 5). And visualize the distribution of sentiment score for each topic.
# install.packages("textdata")
library(SnowballC) #for stemming the words in lexicon
affin_stem <- get_sentiments("afinn") %>%
mutate(word = wordStem(word))
affin_stem
## # A tibble: 2,477 × 2
## word value
## <chr> <dbl>
## 1 abandon -2
## 2 abandon -2
## 3 abandon -2
## 4 abduct -2
## 5 abduct -2
## 6 abduct -2
## 7 abhor -3
## 8 abhor -3
## 9 abhorr -3
## 10 abhor -3
## # ℹ 2,467 more rows
senti_afinn <- top_topics %>%
mutate(word=term)%>%
inner_join(affin_stem) %>%
mutate(topic=as.factor(topic))
## Joining with `by = join_by(word)`
senti_afinn
## # A tibble: 507 × 5
## # Groups: term [268]
## topic term beta word value
## <fct> <chr> <dbl> <chr> <dbl>
## 1 1 assassin 0.00119 assassin -3
## 2 1 assassin 0.00119 assassin -3
## 3 1 attract 0.00239 attract 1
## 4 1 attract 0.00239 attract 1
## 5 1 attract 0.00239 attract 2
## 6 1 attract 0.00239 attract 2
## 7 1 attract 0.00239 attract 2
## 8 1 attract 0.00239 attract 1
## 9 1 await 0.00119 await -1
## 10 1 await 0.00119 await -1
## # ℹ 497 more rows
plot_afinn <- ggplot(senti_afinn, aes(x = topic, y = value, fill = topic)) +
geom_violin(alpha = 0.7) +
geom_boxplot(width = 0.1, alpha = 0.5)+
labs(title = "Distribution of Sentiment Value by Topic",
x = "Topic",
y = "Value") +
scale_fill_viridis(discrete = TRUE)+
theme_minimal()
ggplotly(plot_afinn)
Using ‘bing’ lexicons to label each word ‘positive’ or ‘negative’. And visualize the amount of positive and negative words in each topic.
bing_stem <- get_sentiments("bing") %>%
mutate(word = wordStem(word))
bing_stem
## # A tibble: 6,786 × 2
## word sentiment
## <chr> <chr>
## 1 2-face negative
## 2 abnorm negative
## 3 abolish negative
## 4 abomin negative
## 5 abomin negative
## 6 abomin negative
## 7 abomin negative
## 8 abort negative
## 9 abort negative
## 10 abort negative
## # ℹ 6,776 more rows
senti_bing <- top_topics %>%
mutate(word=term)%>%
inner_join(bing_stem) %>%
mutate(topic=as.factor(topic))%>%
group_by(topic, sentiment) %>%
summarize(count = n(), .groups = "drop")
## Joining with `by = join_by(word)`
senti_bing
## # A tibble: 12 × 3
## topic sentiment count
## <fct> <chr> <int>
## 1 2 negative 97
## 2 2 positive 61
## 3 3 negative 79
## 4 3 positive 65
## 5 5 negative 79
## 6 5 positive 47
## 7 6 negative 51
## 8 6 positive 30
## 9 4 negative 101
## 10 4 positive 73
## 11 1 negative 82
## 12 1 positive 56
plot_bing <- ggplot(senti_bing , aes(x = topic, y = count, fill = sentiment)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Positive vs Negative Sentiment Counts by Topic",
x = "Topic",
y = "Count",
fill = "Sentiment") +
scale_fill_viridis(discrete = TRUE)+
theme_minimal()
ggplotly(plot_bing)
Using ‘nrc’ to label each label a each word an emotion. Like anger, anticipation, fear, joy etc. And visualize the number of each emotion word in each document. I color the negative emotion as blue and positive emotion as red. And the shades of color represent the degrees of emotion. For example, anger, disgust, fear, sadness, negative are colored from deep blue to light blue.
nrc_stem <- get_sentiments("nrc") %>%
mutate(word = wordStem(word))
nrc_stem
## # A tibble: 13,872 × 2
## word sentiment
## <chr> <chr>
## 1 abacu trust
## 2 abandon fear
## 3 abandon negative
## 4 abandon sadness
## 5 abandon anger
## 6 abandon fear
## 7 abandon negative
## 8 abandon sadness
## 9 abandon anger
## 10 abandon fear
## # ℹ 13,862 more rows
senti_nrc <- top_topics %>%
mutate(word=term)%>%
inner_join(nrc_stem) %>%
group_by(topic, sentiment) %>%
mutate(topic=as.factor(topic))%>%
summarize(count = n(), .groups = "drop")
## Joining with `by = join_by(word)`
senti_nrc
## # A tibble: 60 × 3
## topic sentiment count
## <fct> <chr> <int>
## 1 1 anger 34
## 2 1 anticipation 32
## 3 1 disgust 17
## 4 1 fear 35
## 5 1 joy 23
## 6 1 negative 55
## 7 1 positive 55
## 8 1 sadness 17
## 9 1 surprise 16
## 10 1 trust 35
## # ℹ 50 more rows
# order the emotion from least negative to the most postive
senti_nrc$sentiment <- factor(
senti_nrc$sentiment,
levels = c("anger", "disgust", "fear", "sadness", "negative",
"positive", "joy", "trust", "anticipation", "surprise")
)
# Generate blue and red gradients
color_palette <- c(
"anger" = "#08306B",
"disgust" = "#08519C",
"fear" = "#2171B5",
"sadness" = "#4292C6",
"negative" = "#6BAED6",
"positive" = "#FC9272",
"joy" = "#FB6A4A",
"trust" = "#EF3B2C",
"anticipation" = "#CB181D",
"surprise" = "#99000D"
)
# Plot the graph
plot_nrc <- ggplot(senti_nrc, aes(x = topic, y = count, fill = sentiment)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = color_palette) +
labs(
title = "Positive vs Negative Sentiment Counts by Topic",
x = "Topic",
y = "Count",
fill = "Sentiment"
) +
theme_minimal()
ggplotly(plot_nrc)
Course evaluations in undergraduate institutions are vital for professors to understand students’ attitudes and opinions about course structure, instruction, and content. The goal of this study is to help professors better utilize the textual responses in evaluations. By employing topic modeling, we aim to identify key themes that students value in their class experiences, with sentiment analysis providing insight into how students perceive the course’s performance in these areas.
Traditional evaluations, conducted at the end of a course, often benefit future cohorts but fail to address the needs of the current students who provided the feedback. Additionally, future cohorts may have different values and expectations. Therefore, this study also aims to investigate whether collecting weekly comments, rather than a single end-of-course evaluation, could offer a more effective alternative. By doing so, we can also examine how students’ values and attitudes toward a course evolve over time.
Student feedback serves two primary functions in higher education: improving instruction and aiding administrative decision-making (Harvey, 2003). The success of an institution of higher education largely depends on whether students continue enrolling and paying tuition. Student feedback on all aspects of their collegiate experience is necessary for administrators, especially course satisfaction. However, since 2010, the number of students enrolling in postsecondary institutions has dropped, and it decreased dramatically after the COVID-19 pandemic (O’Connell-Domenech, 2024). Because of this, soliciting student feedback has become more critical than ever.
Harvey (2003) outlines four ways to collect student opinions about their courses and professors: informal discussion, formal qualitative sessions, representative committees, or questionnaires. Questionnaires often utilize close-ended questions to make analysis more manageable and efficient, though the scope of student responses will be limited. Literature shows that specific qualitative questions elicit more student comments with a higher average number of words per comment (Williamson & Wang, 2023). Some researchers, wanting more candid student responses with a student-chosen range of topics, choose to have conversations with representatives of students, but this is also time-consuming and may not elicit the most honest responses given the in-person context (Brandl et al., 2017). Thus, researchers are exploring methods to efficiently analyze large numbers of textual responses. In 2023, Aznar-Mas and colleagues collected and qualitatively coded open-ended feedback at an engineering school, leading to a broader view and a larger amount of information which eventually led to teaching improvement. School Board members found the analysis of the qualitative responses to have more advantages and more disadvantages than close-ended feedback. One of the main disadvantages of this type of analysis is that it is labor intensive. However, Marshall (2022) used LDA topic modeling to extract themes from similar data and found that the topics extracted were more specific than those provided by quantitative responses. In addition, about 50% of the comments included actionable components the teacher could improve. Sun and Yan (2023) also used LDA topic modeling to analyze course feedback and found that it revealed topics not covered in more structured questionnaires, including Likert scale questions.
In both Marshall and Sun and Yan’s analysis, LDA was able to extract several themes from the qualitative feedback that could have possibly been too time-consuming to complete non-computationally. Sun and Yan (2023) found that the comment topics varied according to student demographics and characteristics. Marshall’s work built on the topic modeling step to include sentiment analysis, revealing that certain topics were always linked to positive comments while others were always linked to negative ones.
While the insights and topics uncovered in students’ comments are universally useful for administrative purposes, they are insufficient when attempting to improve instruction. At best, instructors could aim to improve their teaching methods or course structure the next semester, but would only know the impact of their changes after a whole semester had passed. Sozer and colleagues (2019) instead focused on mid-term open-ended feedback to impact university-wide learning and teaching in a more timely manner. Research from Spencer and Schmelkin (2022) indicates that when using student feedback for improvements, it is most effective to communicate the results of surveys to students and outline the planned changes. This student feedback and improvement cycle proved to have positive effects on student satisfaction in Sozer et al.’s research.
Our research team intends to discover whether increasing the frequency of open-ended course feedback to weekly rather than twice a semester will allow for more timely instructional improvements and, in turn, increase student satisfaction and learning outcomes. We will use Correlated Topic Modeling to computationally reveal topics present across students’ comments over the fifteen weeks in a term. Then, we will use Sentiment Analysis to discover how positive or negative comments addressing a certain topic tend to be.
Course evaluations typically include Likert scale questions and open-ended responses. While these evaluations provide valuable insights for professors to improve their courses, the written responses are often overlooked—especially in large classes like introductory Psychology or Economics. Professors may lack the capacity to read and analyze every response, missing out on potentially actionable feedback. With large-scale textual data, topic modeling can help professors identify overarching themes in students’ responses, providing a clearer understanding of their opinions. By incorporating sentiment analysis within each topic, professors can gauge whether students view specific aspects of the course positively or believe improvements are needed.
RQ1: To what extent does using CTM on open-ended student feedback collected weekly reveal patterns in common topics in students’ opinions about the class over time?
RQ2: To what extent does sentiment analysis help identify students’ attitudes toward their course experiences?
We would need a dataset containing weekly textual feedback from students throughout the course. To our knowledge, no public datasets of this nature currently exist. However, professors may have restricted access to evaluation data from their courses, though it likely does not include weekly feedback. Therefore, we would need to collect a tailored dataset to conduct this study. This would require partnering with professors from various institutions who teach large-size classes, where students would be given the opportunity to leave a one-sentence comment each week about their feelings and experiences in the course.
With our envisioned study and desired dataset, we need data that captures at least the following components: an auto-generated student ID to anonymize individuals while tracking feedback over time, the course metadata to provide context for the feedback, temporal markers (e.g., Week 1, Week 2, Week 3) to analyze changes across the course duration, and the text or comment itself, which reflects students’ perceptions, attitudes, or experiences in the course.
For the data analysis, we applied Correlated Topic Modeling (CTM), an extension of Latent Dirichlet Allocation (LDA) designed to identify latent topics in large text datasets. CTM, like LDA, operates under the assumption that documents are represented as a “bag of words,” disregarding word order and sentence structure. The model begins by constructing a word matrix, which is then factorized to identify patterns in word usage.
We can describe LDA in this equation (Blei,2012):
$$ p(\beta_{1:K}, \theta_{1:D}, z_{1:D}, w_{1:D}) = \prod_{i=1}^{K} p(\beta_i) \prod_{d=1}^{D} \left( p(\theta_d) \prod_{n=1}^{N_d} \Big( p(z_{d,n} \mid \theta_d) \cdot p(w_{d,n} \mid \beta_{1:K}, z_{d,n}) \Big) \right). $$
Topics here are \( \beta_{1:K} \), where each \( \beta_k \) is a distribution over the vocabulary (the distributions over words as left in Figure 1). The topic proportions for the \( d \)-th document are \( \theta_d \), where \( \theta_{d,k} \) is the topic proportion for topic \( k \) in document \( d \) (the cartoon histogram in Figure 1). The topic assignments for the \( d \)-th document are \( z_d \), where \( z_{d,n} \) is the topic assignment for the \( n \)-th word in document \( d \) (the colored coin in Figure 1). The observed words for document \( d \) are \( w_d \), where \( w_{d,n} \) is the \( n \)-th word in document \( d \), which is an element from the fixed vocabulary.
Unlike LDA, CTM incorporates a logistic normal distribution to account for correlations between topics, allowing for a more nuanced representation of topic relationships. The model outputs the probabilities of words within each topic and the probabilities of topics within each document. We then used 10-fold cross-validation to identify the optimal number of topics.
Based on previous studies’ analysis of open-ended course evaluation comments, we anticipate uncovering topic themes, some of which will be primarily discussed in positive comments, some in negative comments, and some mentioned in both. In addition, because of the repeated weekly analysis, we anticipate that topics will have varying frequencies at different times of the semester, depending on what subjects are being covered, when assignments are due, and whether the teacher is adapting their teaching methods.
Collecting student feedback weekly and analyzing comment sentiment over time will allow teachers and administrators to evaluate the direct impact of their interventions more frequently than once or twice a semester. Future researchers may wish to investigate whether a given topic correlates with students’ overall satisfaction and learning. This would signal a hierarchy of course improvements .
References
Aznar-Mas, L. E., Atarés-Huerta, L., & Marin-Garcia, J. A. (2023). Effectiveness of the use of open-ended questions in student evaluation of teaching in an engineering degree. Journal of Industrial Engineering and Management, 16(3), 521–534. https://doi.org/10.3926/jiem.5620
Blei, D. M. (2012). Probabilistic topic models. Communications of the ACM, 55(4), 77–84. https://doi.org/10.1145/2133806.2133826
Brandl, K., Mandel, J., & Winegarden, B. (2017). Student evaluation team focus groups increase students’ satisfaction with the overall course evaluation process. Medical Education, 51(2), 215–227. https://doi.org/10.1111/medu.13104
Harvey, L. (2003). Student feedback [1]. Quality in Higher Education, 9(1), 3–20. https://doi.org/10.1080/13538320308164
Marshall, P. (2022). Contribution of open-ended questions in student evaluation of teaching. Higher Education Research & Development, 41(6), 1992–2005. https://doi.org/10.1080/07294360.2021.1967887
O’Connell-Domenech, A. (2024). College enrollment could take a big hit in 2025. Here’s why. The Hill: Nexstar Media, Inc. https://thehill.com/changing-america/enrichment/education/4398533-college-enrollment-could-take-a-big-hit-in-2025-heres-why/
Silge, J., & Robinson, D. (2017). Chapter 6: Topic modeling. In Text mining with R: A tidy approach (pp. 89–108). O’Reilly.
Sozer, E. M., Zeybekoglu, Z., & Kaya, M. (2019). Using mid-semester course evaluation as a feedback tool for improving learning and teaching in higher education. Assessment & Evaluation in Higher Education, 44(7), 1003–1016. https://doi.org/10.1080/02602938.2018.1564810
Spencer, K. J., & Schmelkin, L. P. (2002). Student perspectives on teaching and its evaluation. Assessment & Evaluation in Higher Education, 27(5), 397–409. https://doi.org/10.1080/0260293022000009285
Sun, J., & Yan, L. (2023). Using topic modeling to understand comments in student evaluations of teaching. Discovery Education, 2, 25. https://doi.org/10.1007/s44217-023-00051-0
Williamson, A. L., & Wang, I. G. (2023). Redesigning a course evaluation instrument: Experience, practical guidance, and lessons learned. Journal of Management Education, 47(4), 388–416. https://doi.org/10.1177/10525629231167296